From 230dbf6a59daeddfd016fdaae65155be1942665d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 6 Aug 2014 08:01:17 -0700 Subject: [PATCH] Use libgit2 for driving git instead of the CLI In general relying on external programs is dicey and tricky as they're very different across systems in both how they're used as well as what versions you'll find. Instead of binding to the least common denominator of CLI, we can code against an exact version of libgit2. This introduces a build-time dependency on cmake which libgit2 requires to build itself, which is unfortunate, but thankfully it's only a build time dep. The build process for libgit2 also automatically detects as many system libraries as possible to use (if available), falling back to bundled versions if not available. I have currently not figured how to control this, so the link-config package is used to build libgit2 which requires that pkg-config be installed to build cargo as well. Closes #138 --- Cargo.lock | 22 +++ Cargo.toml | 5 +- configure | 2 + src/cargo/core/resolver.rs | 4 +- src/cargo/core/source.rs | 4 +- src/cargo/lib.rs | 1 + src/cargo/ops/cargo_new.rs | 32 ++-- src/cargo/sources/git/source.rs | 2 +- src/cargo/sources/git/utils.rs | 273 +++++++++++++++----------------- src/cargo/util/errors.rs | 7 + src/cargo/util/to_url.rs | 8 + 11 files changed, 195 insertions(+), 165 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ac43613a..4d8d2e17f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,6 +4,7 @@ version = "0.0.1-pre" dependencies = [ "docopt 0.6.0 (git+https://github.com/burntsushi/docopt.rs#fd2377d1c36b2671136cd36566aad5d54c2fb17e)", "docopt_macros 0.6.0 (git+https://github.com/burntsushi/docopt.rs#fd2377d1c36b2671136cd36566aad5d54c2fb17e)", + "git2 0.0.1 (git+https://github.com/alexcrichton/git2-rs#7f03391f1dc9258daa30cc0d2d3433e05826f692)", "hamcrest 0.1.0 (git+https://github.com/carllerche/hamcrest-rust.git#c23b8769f20f306c59a96b22532bb09b33faa764)", "semver 0.0.1 (git+https://github.com/rust-lang/semver#e17191f51d543529a6f07e6731802b77977fcef8)", "toml 0.1.0 (git+https://github.com/alexcrichton/toml-rs#934e093047ae15432fcc772d4e01fdf5fd56d2fb)", @@ -28,11 +29,32 @@ name = "encoding" version = "0.1.0" source = "git+https://github.com/lifthrasiir/rust-encoding#b82ad2104b2d079620bd227fb9328b2ff8c20ca9" +[[package]] +name = "git2" +version = "0.0.1" +source = "git+https://github.com/alexcrichton/git2-rs#7f03391f1dc9258daa30cc0d2d3433e05826f692" +dependencies = [ + "libgit2 0.0.1 (git+https://github.com/alexcrichton/git2-rs#7f03391f1dc9258daa30cc0d2d3433e05826f692)", +] + [[package]] name = "hamcrest" version = "0.1.0" source = "git+https://github.com/carllerche/hamcrest-rust.git#c23b8769f20f306c59a96b22532bb09b33faa764" +[[package]] +name = "libgit2" +version = "0.0.1" +source = "git+https://github.com/alexcrichton/git2-rs#7f03391f1dc9258daa30cc0d2d3433e05826f692" +dependencies = [ + "link-config 0.0.1 (git+http://github.com/alexcrichton/link-config#f08103ea7d2e2d3369c2c5e66b0220c8d16b92c9)", +] + +[[package]] +name = "link-config" +version = "0.0.1" +source = "git+http://github.com/alexcrichton/link-config#f08103ea7d2e2d3369c2c5e66b0220c8d16b92c9" + [[package]] name = "semver" version = "0.0.1" diff --git a/Cargo.toml b/Cargo.toml index 4c17e2236..0fe524f90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ git = "https://github.com/burntsushi/docopt.rs" [dependencies.toml] git = "https://github.com/alexcrichton/toml-rs" -[dependencies.hamcrest] +[dev-dependencies.hamcrest] git = "https://github.com/carllerche/hamcrest-rust.git" [dependencies.url] @@ -28,6 +28,9 @@ git = "https://github.com/servo/rust-url" [dependencies.semver] git = "https://github.com/rust-lang/semver" +[dependencies.git2] +git = "https://github.com/alexcrichton/git2-rs" + [[bin]] name = "cargo" test = false diff --git a/configure b/configure index f9a34c04e..a263e55c4 100755 --- a/configure +++ b/configure @@ -262,6 +262,8 @@ need_cmd date need_cmd tr need_cmd sed need_cmd file +need_cmd cmake +need_cmd pkg-config CFG_SRC_DIR="$(cd $(dirname $0) && pwd)/" CFG_BUILD_DIR="$(pwd)/" diff --git a/src/cargo/core/resolver.rs b/src/cargo/core/resolver.rs index 43cb2e3b4..92d81d8dd 100644 --- a/src/cargo/core/resolver.rs +++ b/src/cargo/core/resolver.rs @@ -2,7 +2,8 @@ use std::collections::HashMap; use std::fmt; use serialize::{Encodable, Encoder, Decodable, Decoder}; -use util::graph::{Nodes,Edges}; +use util::profile; +use util::graph::{Nodes, Edges}; use core::{ Dependency, @@ -238,6 +239,7 @@ impl<'a, R: Registry> Context<'a, R> { pub fn resolve(root: &PackageId, deps: &[Dependency], registry: &mut R) -> CargoResult { log!(5, "resolve; deps={}", deps); + let _p = profile::start(format!("resolving: {}", root)); let mut context = Context::new(registry, root.clone()); try!(resolve_deps(root, deps, &mut context)); diff --git a/src/cargo/core/source.rs b/src/cargo/core/source.rs index ae415e130..511281a3e 100644 --- a/src/cargo/core/source.rs +++ b/src/cargo/core/source.rs @@ -212,9 +212,7 @@ impl SourceId { // Pass absolute path pub fn for_path(path: &Path) -> CargoResult { - let url = try!(Url::from_file_path(path).map_err(|()| { - human(format!("not a valid path for a URL: {}", path.display())) - })); + let url = try!(path.to_url().map_err(human)); Ok(SourceId::new(PathKind, url)) } diff --git a/src/cargo/lib.rs b/src/cargo/lib.rs index 89baab021..0240a36f3 100644 --- a/src/cargo/lib.rs +++ b/src/cargo/lib.rs @@ -17,6 +17,7 @@ extern crate url; #[phase(plugin, link)] extern crate log; extern crate docopt; +extern crate git2; extern crate toml; #[cfg(test)] extern crate hamcrest; diff --git a/src/cargo/ops/cargo_new.rs b/src/cargo/ops/cargo_new.rs index c9f1ccdf0..367115ff0 100644 --- a/src/cargo/ops/cargo_new.rs +++ b/src/cargo/ops/cargo_new.rs @@ -2,12 +2,10 @@ use std::os; use std::io; use std::io::{fs, File}; -use util::{CargoResult, human, ChainError, process}; -use core::shell::MultiShell; +use git2::{Repository, Config}; -macro_rules! git( ($($a:expr),*) => ({ - process("git") $(.arg($a))* .exec_with_output() -}) ) +use util::{CargoResult, human, ChainError}; +use core::shell::MultiShell; pub struct NewOptions<'a> { pub git: bool, @@ -31,7 +29,7 @@ pub fn new(opts: NewOptions, _shell: &mut MultiShell) -> CargoResult<()> { fn mk(path: &Path, name: &str, opts: &NewOptions) -> CargoResult<()> { if opts.git { - try!(git!("init", path)); + try!(Repository::init(path)); let mut gitignore = "/target\n".to_string(); if !opts.bin { gitignore.push_str("/Cargo.lock\n"); @@ -70,19 +68,17 @@ fn it_works() { } fn discover_author() -> CargoResult { - let name = match git!("config", "user.name") { - Ok(out) => String::from_utf8_lossy(out.output.as_slice()).into_string(), - Err(..) => match os::getenv("USER") { - Some(user) => user, - None => return Err(human("could not determine the current user, \ - please set $USER")) - } - }; - - let email = match git!("config", "user.email") { - Ok(out) => Some(String::from_utf8_lossy(out.output.as_slice()).into_string()), - Err(..) => None, + let git_config = Config::open_default().ok(); + let git_config = git_config.as_ref(); + let name = git_config.and_then(|g| g.get_str("user.name").ok()) + .map(|s| s.to_string()) + .or_else(|| os::getenv("USER")); + let name = match name { + Some(name) => name, + None => return Err(human("could not determine the current user, \ + please set $USER")) }; + let email = git_config.and_then(|g| g.get_str("user.email").ok()); let name = name.as_slice().trim().to_string(); let email = email.map(|s| s.as_slice().trim().to_string()); diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 0fa87f2bb..633b470d3 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -171,7 +171,7 @@ impl<'a, 'b> Source for GitSource<'a, 'b> { let rev = try!(repo.rev_for(self.reference.as_slice())); (repo, rev) } else { - (self.remote.db_at(&self.db_path), actual_rev.unwrap()) + (try!(self.remote.db_at(&self.db_path)), actual_rev.unwrap()) }; try!(repo.copy_to(actual_rev.clone(), &self.checkout_path)); diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index fec0b23ed..3894879e6 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -4,8 +4,9 @@ use std::io::{UserDir}; use std::io::fs::{mkdir_recursive,rmdir_recursive}; use serialize::{Encodable,Encoder}; use url::Url; +use git2; -use util::{CargoResult, ChainError, ProcessBuilder, process, human}; +use util::{CargoResult, ChainError, human, ToUrl, internal, Require}; #[deriving(PartialEq,Clone,Encodable)] pub enum GitReference { @@ -54,22 +55,6 @@ impl Show for GitRevision { } } -macro_rules! git( - ($config:expr, $($arg:expr),+) => ( - try!(git_inherit(&$config, process("git")$(.arg($arg))*)) - ) -) - -macro_rules! git_output( - ($config:expr, $($arg:expr),*) => ({ - try!(git_output(&$config, process("git")$(.arg($arg))*)) - }) -) - -macro_rules! errln( - ($($arg:tt)*) => (let _ = writeln!(::std::io::stdio::stderr(), $($arg)*)) -) - /// GitRemote represents a remote repository. It gets cloned into a local /// GitDatabase. #[deriving(PartialEq,Clone,Show)] @@ -92,10 +77,10 @@ impl> Encodable for GitRemote { /// GitDatabase is a local clone of a remote repository's database. Multiple /// GitCheckouts can be cloned from this GitDatabase. -#[deriving(PartialEq,Clone)] pub struct GitDatabase { remote: GitRemote, path: Path, + repo: git2::Repository, } #[deriving(Encodable)] @@ -116,25 +101,29 @@ impl> Encodable for GitDatabase { /// GitCheckout is a local checkout of a particular revision. Calling /// `clone_into` with a reference will resolve the reference into a revision, /// and return a CargoError if no revision for that reference was found. -pub struct GitCheckout { - database: GitDatabase, +pub struct GitCheckout<'a> { + database: &'a GitDatabase, location: Path, revision: GitRevision, + repo: git2::Repository, } #[deriving(Encodable)] pub struct EncodableGitCheckout { - database: GitDatabase, + database: EncodableGitDatabase, location: String, revision: String, } -impl> Encodable for GitCheckout { +impl<'a, E, S: Encoder> Encodable for GitCheckout<'a> { fn encode(&self, s: &mut S) -> Result<(), E> { EncodableGitCheckout { - database: self.database.clone(), location: self.location.display().to_string(), - revision: self.revision.to_string() + revision: self.revision.to_string(), + database: EncodableGitDatabase { + remote: self.database.remote.clone(), + path: self.database.path.display().to_string(), + }, }.encode(s) } } @@ -152,50 +141,48 @@ impl GitRemote { pub fn rev_for(&self, path: &Path, reference: S) -> CargoResult { - // We simultaneously want to transform the reference into a resolved - // revision as well as verify that the reference itself is inside the - // repository. Sadly for a 40-character SHA1 the call to `rev-parse` - // will *always* return the same string with a 0 exit status, regardless - // of whether it's present in the database. - // - // Later versions of git introduced a syntax for this query via - // `$sha1^{object}`, but older versions of git do not support this. To - // get around this limitation, we chop 40-character sha revisions to 39 - // characters to get an error'd exit status if the revision is indeed - // not present. - let mut reference = reference.as_slice(); - if reference.len() == 40 { - reference = reference.slice_to(39); - } - Ok(GitRevision(git_output!(*path, "rev-parse", reference))) + let db = try!(self.db_at(path)); + db.rev_for(reference) } pub fn checkout(&self, into: &Path) -> CargoResult { - if into.exists() { - try!(self.fetch_into(into)); + let repo = if into.exists() { + let r = try!(git2::Repository::open(into)); + try!(self.fetch_into(&r)); + r } else { - try!(self.clone_into(into)); - } + try!(self.clone_into(into)) + }; - Ok(GitDatabase { remote: self.clone(), path: into.clone() }) + Ok(GitDatabase { remote: self.clone(), path: into.clone(), repo: repo }) } - pub fn db_at(&self, db_path: &Path) -> GitDatabase { - GitDatabase { remote: self.clone(), path: db_path.clone() } + pub fn db_at(&self, db_path: &Path) -> CargoResult { + let repo = try!(git2::Repository::open(db_path)); + Ok(GitDatabase { + remote: self.clone(), + path: db_path.clone(), + repo: repo, + }) } - fn fetch_into(&self, path: &Path) -> CargoResult<()> { - Ok(git!(*path, "fetch", "--force", "--quiet", "--tags", - self.url.to_string(), "refs/heads/*:refs/heads/*")) + fn fetch_into(&self, dst: &git2::Repository) -> CargoResult<()> { + let url = self.url.to_string(); + let refspec = "refs/heads/*:refs/heads/*"; + let mut remote = try!(dst.remote_create_anonymous(url.as_slice(), + refspec)); + try!(remote.add_fetch("refs/tags/*:refs/tags/*")); + let sig = try!(git2::Signature::default(dst)); + try!(remote.fetch(&sig, None)); + Ok(()) } - fn clone_into(&self, path: &Path) -> CargoResult<()> { - let dirname = Path::new(path.dirname()); - - try!(mkdir_recursive(path, UserDir)); - - Ok(git!(dirname, "clone", self.url.to_string(), path, "--bare", - "--no-hardlinks", "--quiet")) + fn clone_into(&self, dst: &Path) -> CargoResult { + let url = self.url.to_string(); + let repo = try!(git2::build::RepoBuilder::new().bare(true) + .hardlinks(false) + .clone(url.as_slice(), dst)); + Ok(repo) } } @@ -206,8 +193,7 @@ impl GitDatabase { pub fn copy_to(&self, rev: GitRevision, dest: &Path) -> CargoResult { - let checkout = try!(GitCheckout::clone_into(dest, self.clone(), - rev.clone())); + let checkout = try!(GitCheckout::clone_into(dest, self, rev.clone())); match self.remote.rev_for(dest, "HEAD") { Ok(ref head) if rev == *head => {} @@ -220,121 +206,126 @@ impl GitDatabase { } pub fn rev_for(&self, reference: S) -> CargoResult { - self.remote.rev_for(&self.path, reference) + let rev = try!(self.repo.revparse_single(reference.as_slice())); + Ok(GitRevision(rev.id().to_string())) } pub fn has_ref(&self, reference: S) -> CargoResult<()> { - git_output!(self.path, "rev-parse", "--verify", reference.as_slice()); + try!(self.repo.revparse_single(reference.as_slice())); Ok(()) } } -impl GitCheckout { - fn clone_into(into: &Path, database: GitDatabase, - revision: GitRevision) -> CargoResult { - let checkout = GitCheckout { +impl<'a> GitCheckout<'a> { + fn clone_into<'a>(into: &Path, database: &'a GitDatabase, + revision: GitRevision) -> CargoResult> { + // If the git checkout already exists, we don't need to clone it again + let repo = match git2::Repository::open(into) { + Ok(repo) => repo, + Err(..) => try!(GitCheckout::clone_repo(database.get_path(), into)), + }; + Ok(GitCheckout { location: into.clone(), database: database, revision: revision, - }; - - // If the git checkout already exists, we don't need to clone it again - if !checkout.location.join(".git").exists() { - try!(checkout.clone_repo()); - } - - Ok(checkout) - } - - fn get_source(&self) -> &Path { - self.database.get_path() + repo: repo, + }) } pub fn get_rev(&self) -> &str { self.revision.as_slice() } - fn clone_repo(&self) -> CargoResult<()> { - let dirname = Path::new(self.location.dirname()); + fn clone_repo(source: &Path, into: &Path) -> CargoResult { + let dirname = into.dir_path(); try!(mkdir_recursive(&dirname, UserDir).chain_error(|| { - human(format!("Couldn't mkdir {}", - Path::new(self.location.dirname()).display())) + human(format!("Couldn't mkdir {}", dirname.display())) })); - if self.location.exists() { - try!(rmdir_recursive(&self.location).chain_error(|| { - human(format!("Couldn't rmdir {}", - Path::new(&self.location).display())) + if into.exists() { + try!(rmdir_recursive(into).chain_error(|| { + human(format!("Couldn't rmdir {}", into.display())) })); } - git!(dirname, "clone", "--no-checkout", "--quiet", - self.get_source(), &self.location); - try!(self.reset()); - - Ok(()) + let url = try!(source.to_url().map_err(human)); + let url = url.to_string(); + let repo = try!(git2::Repository::clone(url.as_slice(), into)); + Ok(repo) } fn fetch(&self) -> CargoResult<()> { - // In git 1.8, apparently --tags explicitly *only* fetches tags, it does - // not fetch anything else. In git 1.9, however, git apparently fetches - // everything when --tags is passed. - // - // This means that if we want to fetch everything we need to execute - // both with and without --tags on 1.8 (apparently), and only with - // --tags on 1.9. For simplicity, we execute with and without --tags for - // all gits. - // - // FIXME: This is suspicious. I have been informed that, for example, - // bundler does not do this, yet bundler appears to work! - // - // And to continue the fun, git before 1.7.3 had the fun bug that if a - // branch was tracking a remote, then `git fetch $url` doesn't work! - // - // For details, see - // https://www.kernel.org/pub/software/scm/git/docs/RelNotes-1.7.3.txt - // - // In this case we just use `origin` here instead of the database path. - git!(self.location, "fetch", "--force", "--quiet", "origin"); - git!(self.location, "fetch", "--force", "--quiet", "--tags", "origin"); + info!("fetch {}", self.repo.path().display()); + let mut remote = try!(self.repo.remote_load("origin")); + try!(remote.add_fetch("refs/tags/*:refs/tags/*")); + let sig = try!(git2::Signature::default(&self.repo)); + try!(remote.fetch(&sig, None)); try!(self.reset()); Ok(()) } fn reset(&self) -> CargoResult<()> { - Ok(git!(self.location, "reset", "-q", "--hard", - self.revision.as_slice())) + info!("reset {} to {}", self.repo.path().display(), + self.revision.as_slice()); + let sig = try!(git2::Signature::default(&self.repo)); + let oid = try!(git2::Oid::from_str(self.revision.as_slice())); + let object = try!(git2::Object::lookup(&self.repo, oid, None)); + try!(self.repo.reset(&object, git2::Hard, &sig, None)); + Ok(()) } fn update_submodules(&self) -> CargoResult<()> { - Ok(git!(self.location, "submodule", "update", "--init", - "--recursive", "--quiet")) + let sig = try!(git2::Signature::default(&self.repo)); + return update_submodules(&self.repo, &sig); + + fn update_submodules(repo: &git2::Repository, + sig: &git2::Signature) -> CargoResult<()> { + info!("update submodules for: {}", repo.path().display()); + + for mut child in try!(repo.submodules()).move_iter() { + try!(child.init(false)); + + // A submodule which is listed in .gitmodules but not actually + // checked out will not have a head id, so we should ignore it. + let head = match child.head_id() { + Some(head) => head, + None => continue, + }; + + // If the submodule hasn't been checked out yet, we need to + // clone it. If it has been checked out and the head is the same + // as the submodule's head, then we can bail out and go to the + // next submodule. + let repo = match child.open() { + Ok(repo) => { + if child.head_id() == try!(repo.head()).target() { + continue + } + repo + } + Err(..) => { + let path = repo.path().dir_path().join(child.path()); + let url = try!(child.url().require(|| { + internal("invalid submodule url") + })); + try!(git2::Repository::clone(url, &path)) + } + }; + + // Fetch data from origin and reset to the head commit + let url = try!(child.url().require(|| { + internal("repo with non-utf8 url") + })); + let refspec = "refs/heads/*:refs/heads/*"; + let mut remote = try!(repo.remote_create_anonymous(url, refspec)); + try!(remote.fetch(sig, None)); + + let obj = try!(git2::Object::lookup(&repo, head, None)); + try!(repo.reset(&obj, git2::Hard, sig, None)); + try!(update_submodules(&repo, sig)); + } + Ok(()) + } } } - -fn git(path: &Path, cmd: ProcessBuilder) -> ProcessBuilder { - debug!("Executing {} @ {}", cmd, path.display()); - - cmd.cwd(path.clone()) -} - -fn git_inherit(path: &Path, cmd: ProcessBuilder) -> CargoResult<()> { - let cmd = git(path, cmd); - cmd.exec().chain_error(|| { - human(format!("Executing {} failed", cmd)) - }) -} - -fn git_output(path: &Path, cmd: ProcessBuilder) -> CargoResult { - let cmd = git(path, cmd); - let output = try!(cmd.exec_with_output().chain_error(|| - human(format!("Executing {} failed", cmd)))); - - Ok(to_str(output.output.as_slice()).as_slice().trim_right().to_string()) -} - -fn to_str(vec: &[u8]) -> String { - String::from_utf8_lossy(vec).into_string() -} - diff --git a/src/cargo/util/errors.rs b/src/cargo/util/errors.rs index cdafb631e..352afb356 100644 --- a/src/cargo/util/errors.rs +++ b/src/cargo/util/errors.rs @@ -7,6 +7,7 @@ use std::str; use docopt; use TomlError = toml::Error; use url; +use git2; pub trait CargoError: Send { fn description(&self) -> String; @@ -294,6 +295,12 @@ impl CargoError for url::ParseError { from_error!(url::ParseError) +impl CargoError for git2::Error { + fn description(&self) -> String { self.to_string() } +} + +from_error!(git2::Error) + impl CliError { pub fn new(error: S, code: uint) -> CliError { let error = human(error.as_slice().to_string()); diff --git a/src/cargo/util/to_url.rs b/src/cargo/util/to_url.rs index d130daa72..d3e8602cd 100644 --- a/src/cargo/util/to_url.rs +++ b/src/cargo/util/to_url.rs @@ -25,6 +25,14 @@ impl<'a> ToUrl for &'a str { } } +impl<'a> ToUrl for &'a Path { + fn to_url(self) -> Result { + Url::from_file_path(self).map_err(|()| { + format!("invalid path url `{}`", self.display()) + }) + } +} + fn mapper(s: &str) -> url::SchemeType { match s { "git" => url::RelativeScheme("9418"), -- 2.30.2